# AMAR ET AL. - COUNTERING MISINFORMATION EARLY (2025)
## REPLICATION FILE: 15_robust_alt_spec.R
### This script estimates alternative model specifications for the main regressions.
# ----
# Run regressions ----
## Library FE ----
# function to tabulate regressions for each dv
tab_regression_clcdc <- function(varname, design) {
  fml <- as.formula(str_c(varname, "~treatment", "+ clcdc")) 
  mod <- svyglm(fml, design = design)
  # Extract observed values and weights
  y <- model.response(model.frame(mod))
  w <- weights(mod)
  
  # Calculate the weighted mean of the dependent variable
  weighted_mean_y <- sum(w * y) / sum(w)
  
  # Calculate Total Sum of Squares (TSS) with weights
  total_ss <- sum(w * (y - weighted_mean_y)^2)
  
  residual_ss <- sum(weights(mod)*residuals(mod)^2)
  survey_r_squared <- round(1 - (residual_ss / total_ss),3)
  
  out <- tidy(mod) %>%
    mutate(dv = varname, n = nobs(mod), r_squared = survey_r_squared, .before = 1)
  out
}


# Main outcomes
regressions_clcdc <-  lapply(dv_labs_endline_main$dv, \(x) tab_regression_clcdc(x, bimli_svy_dkr.rm)) %>% # make a list of regression tables
  bind_rows %>%
  mutate(margin = qnorm(0.975) * std.error,
         lower = estimate - margin,
         upper = estimate + margin) %>%
  filter(term != "(Intercept)" & !grepl("clcdc", term) )
regressions_clcdc <- left_join(dv_labs_endline_main, regressions_clcdc, by = "dv")

regressions_clcdc <- regressions_clcdc %>%
  filter(type == "idx")

## District FE ----
# function to tabulate regressions for each dv
tab_regression_district <- function(varname, design) {
  fml <- as.formula(str_c(varname, "~treatment", "+ district_name")) 
  mod <- svyglm(fml, design = design)
  # Extract observed values and weights
  y <- model.response(model.frame(mod))
  w <- weights(mod)
  
  # Calculate the weighted mean of the dependent variable
  weighted_mean_y <- sum(w * y) / sum(w)
  
  # Calculate Total Sum of Squares (TSS) with weights
  total_ss <- sum(w * (y - weighted_mean_y)^2)
  
  residual_ss <- sum(weights(mod)*residuals(mod)^2)
  survey_r_squared <- round(1 - (residual_ss / total_ss),3)
  
  out <- tidy(mod) %>%
    mutate(dv = varname, n = nobs(mod), r_squared = survey_r_squared, .before = 1)
  out
}


# Main outcomes
regressions_district <-  lapply(dv_labs_endline_main$dv, \(x) tab_regression_district(x, bimli_svy_dkr.rm)) %>% # make a list of regression tables
  bind_rows %>%
  mutate(margin = qnorm(0.975) * std.error,
         lower = estimate - margin,
         upper = estimate + margin) %>%
  filter(term != "(Intercept)" & !grepl("district", term) )
regressions_district <- left_join(dv_labs_endline_main, regressions_district, by = "dv")

regressions_district <- regressions_district %>%
  filter(type == "idx")

## District-Spillover FE ----
# function to tabulate regressions for each dv
tab_regression_district_spillover <- function(varname, design) {
  fml <- as.formula(str_c(varname, "~treatment", "+ district_spillover_pre")) 
  mod <- svyglm(fml, design = design)
  # Extract observed values and weights
  y <- model.response(model.frame(mod))
  w <- weights(mod)
  
  # Calculate the weighted mean of the dependent variable
  weighted_mean_y <- sum(w * y) / sum(w)
  
  # Calculate Total Sum of Squares (TSS) with weights
  total_ss <- sum(w * (y - weighted_mean_y)^2)
  
  residual_ss <- sum(weights(mod)*residuals(mod)^2)
  survey_r_squared <- round(1 - (residual_ss / total_ss),3)
  
  out <- tidy(mod) %>%
    mutate(dv = varname, n = nobs(mod), r_squared = survey_r_squared, .before = 1)
  out
}


# Main outcomes
regressions_district_spillover <-  lapply(dv_labs_endline_main$dv, \(x) tab_regression_district_spillover(x, bimli_svy_dkr.rm)) %>% # make a list of regression tables
  bind_rows %>%
  mutate(margin = qnorm(0.975) * std.error,
         lower = estimate - margin,
         upper = estimate + margin) %>%
  filter(term != "(Intercept)" & !grepl("district_spillover_pre", term) )
regressions_district_spillover <- left_join(dv_labs_endline_main, regressions_district_spillover, by = "dv")

regressions_district_spillover <- regressions_district_spillover %>%
  filter(type == "idx")

# Create tables ----
## Library FEs ----
model_labels <- unique(regressions_clcdc$label)
model_labels <- model_labels[model_labels != "Civic Participation Index"]

# Relocate "Awareness Index" to the end
if("Awareness Index" %in% model_labels) {
  # Find the position of "Awareness Index"
  awareness_idx <- which(model_labels == "Awareness Index")
  # Remove "Awareness Index" from its original position
  model_labels <- model_labels[-awareness_idx]
  # Add it back at the end
  model_labels <- c(model_labels, "Awareness Index")
}

n_values <- sapply(model_labels, function(lbl) {
  # Get the first N value for each model (assuming it's consistent within models)
  n_val <- regressions_clcdc %>% 
    filter(label == lbl) %>% 
    pull(n) %>% 
    first()
  return(n_val)
})

# Extract R-squared values for each model
r2_values <- sapply(model_labels, function(lbl) {
  # Get the R-squared value for each model
  r2_val <- regressions_clcdc %>% 
    filter(label == lbl) %>% 
    pull(r_squared) %>% 
    first()
  return(r2_val)
})

# Format R² values to match current precision
r2_formatted <- sprintf("%.3f", r2_values)

# Create the dynamic string for N and R² rows
n_row <- paste("N &", paste(n_values, collapse = " & "), "\\\\ \n")
r2_row <- paste("R2 &", paste(r2_formatted, collapse = " & "), "\\\\ \n")


regressions_clcdc_tab <- regressions_clcdc %>%
  select(label, term, estimate, std.error,p.value) %>%
  arrange(term)

regressions_clcdc_tab <- regressions_clcdc_tab %>%
  filter(label != "Civic Participation Index") %>%
  pivot_wider(
    names_from = label,
    values_from = c(estimate, std.error, p.value)
  )

regressions_clcdc_tab <- regressions_clcdc_tab %>%
  pivot_longer(
    cols = starts_with("estimate_") | starts_with("std.error_") | starts_with("p.value_"),
    names_to = c("stat", "label"),
    names_sep = "_",
    values_to = "value",
    values_transform = list(value = as.character)  # Convert to character to handle stars and numeric together
  )


p_values_reg_clcdc <- regressions_clcdc_tab %>%
  filter(stat == "p.value") %>%
  select(label, term, p.value = value)  # Rename `value` to `p.value` for clarity

regressions_clcdc_tab <- regressions_clcdc_tab %>%
  left_join(p_values_reg_clcdc, by = c("label", "term"))

regressions_clcdc_tab <- regressions_clcdc_tab %>%
  mutate(
    # Round and add significance stars based on `p.value`
    value = case_when(
      stat == "estimate" ~ paste0(sprintf("%.3f", as.numeric(value)),
                                  case_when(
                                    as.numeric(p.value) < 0.001 ~ "***",
                                    as.numeric(p.value) < 0.01 ~ "**",
                                    as.numeric(p.value) < 0.05 ~ "*",
                                    TRUE ~ ""
                                  )
      ),
      stat == "std.error" ~ sprintf("%.4f", as.numeric(value)),
      stat == "p.value" ~ sprintf("%.4f", as.numeric(value))
    )
  ) %>%
  select(-p.value)  %>%
  # Pivot to make `estimate`, `std.error`, and `p.value` columns wide
  pivot_wider(
    names_from = label,
    values_from = value
  ) %>%
  # Filter out rows where `stat` is `p.value`
  filter(stat != "p.value")  %>%
  # Correct way to drop the `p.value` column
  mutate(
    # Format std.error with parentheses and ensure estimate retains significance stars
    across(starts_with("Awareness Index"):starts_with("Engagement Behavior Index"),
           ~ ifelse(stat == "estimate", ., paste0("(", ., ")"))
    )
  ) %>%
  # Arrange terms in specified order
  arrange(
    match(term, c(
      "treatmentMedia Literacy", "spillover"
    )),
    stat
  ) %>%
  mutate(
    # Remove `term` for p.value rows and rename variables
    term = ifelse(stat == "std.error", "", term)
  ) %>%
  select(-stat) %>%
  rename(Outcome = term) %>%
  mutate(Outcome = case_when(
    Outcome == "treatmentMedia Literacy" ~ "Treatment",
    Outcome == "spillover" ~ "Indicator for  low-spillover stratum",
    TRUE ~ Outcome  # Keep other values unchanged
  )) %>%
  rename_with(~ gsub("Index$", "", .), ends_with("Index"))


regressions_clcdc_tab <- regressions_clcdc_tab %>%
  relocate("Awareness ", .after = last_col())

regressions_clcdc_tab <- regressions_clcdc_tab %>%
  select(Outcome, everything())

# Ensure no row names in final_data
rownames(regressions_clcdc_tab) <- NULL

# Remove existing column names from final_data to avoid default headers in LaTeX output
colnames(regressions_clcdc_tab) <- NULL


# Generate LaTeX table with xtable
latex_table_regression_clcdc <- xtable(regressions_clcdc_tab, 
                                       caption = "Main results with library FE", 
                                       label = "tab:alt_spec_clcdc_fe")

# Manually specify header rows for two-line headers
header_row1_regression_clcdc <- c("Outcome",  "Accuracy", "Sharing", 
                                  "Health", "Source", "Engagement", "Engagement", "Awareness")
header_row2_regression_clcdc <- c("",  "Discernment", "Discernment", "Preferences", "Discernment", "Attitude", "Behavior", "")


# Create the LaTeX code for the two-line headers with a single \hline after "Behavior" and no extra lines
header_latex_clcdc <- paste(
  paste(header_row1_regression_clcdc, collapse = " & "), "\\\\",
  paste(header_row2_regression_clcdc, collapse = " & "), "\\\\",
  "\\hline"
)

# Calculate the number of columns and rows as needed
num_cols_reg_clcdc <- ncol(regressions_clcdc_tab) - 1  # Adjust for the alignment
num_rows_reg_clcdc <- nrow(latex_table_regression_clcdc)     # Ensure numeric row count


# Footer
command2 <- paste0(
  "\\hline\n", 
  n_row, 
  r2_row, 
  "Library FEs&  Yes &  Yes &  Yes &  Yes &  Yes &  Yes &  Yes \\\\ \n",
  "\\hline\n",
  "\\multicolumn{", num_cols_reg_clcdc + 1, "}{l}{*p$<$0.05; **p$<$0.01; ***p$<$0.001. SEs clustered at the village level in parentheses.} \\\\ \n"
)
# Capture the LaTeX output as a character vector without row names
latex_output_reg_clcdc <- capture.output(print(latex_table_regression_clcdc, type = "latex", include.rownames = FALSE, 
                                               sanitize.text.function = identity, 
                                               align = c("l", "l", rep("c", num_cols_reg_clcdc)),  # Use calculated number of columns
                                               add.to.row = list(
                                                 pos = list(0, num_rows_reg_clcdc),  # Use calculated row count
                                                 command = c(
                                                   header_latex_clcdc, command2)
                                               )
))

# Find the index of `\centering` and insert `\small` right after it
centering_index_clcdc <- which(grepl("\\centering", latex_output_reg_clcdc))
latex_output_reg_clcdc <- append(latex_output_reg_clcdc, 
                                 "\\resizebox{\\linewidth}{!}{%", 
                                 after = centering_index_clcdc)

# Replace `\end{tabular}` with `%\n}`
latex_output_reg_clcdc <- sub("\\\\end\\{tabular\\}", 
                              "\\\\hline\n\\\\end\\{tabular\\}%\n}",
                              latex_output_reg_clcdc)

# Remove extra space before table
latex_output_reg_clcdc[which(latex_output_reg_clcdc == " \\\\ ")] <- "  \\hline"

# Write the captured output to the file
writeLines(latex_output_reg_clcdc, 
           "output/tables/tab_alt_spec_clcdc_fe.tex")

## District FEs ----
model_labels <- unique(regressions_district$label)
model_labels <- model_labels[model_labels != "Civic Participation Index"]

# Relocate "Awareness Index" to the end
if("Awareness Index" %in% model_labels) {
  # Find the position of "Awareness Index"
  awareness_idx <- which(model_labels == "Awareness Index")
  # Remove "Awareness Index" from its original position
  model_labels <- model_labels[-awareness_idx]
  # Add it back at the end
  model_labels <- c(model_labels, "Awareness Index")
}



n_values <- sapply(model_labels, function(lbl) {
  # Get the first N value for each model (assuming it's consistent within models)
  n_val <- regressions_district %>% 
    filter(label == lbl) %>% 
    pull(n) %>% 
    first()
  return(n_val)
})

# Extract R-squared values for each model
r2_values <- sapply(model_labels, function(lbl) {
  # Get the R-squared value for each model
  r2_val <- regressions_district %>% 
    filter(label == lbl) %>% 
    pull(r_squared) %>% 
    first()
  return(r2_val)
})

# Format R² values to match current precision
r2_formatted <- sprintf("%.3f", r2_values)

# Create the dynamic string for N and R² rows
n_row <- paste("N &", paste(n_values, collapse = " & "), "\\\\ \n")
r2_row <- paste("R2 &", paste(r2_formatted, collapse = " & "), "\\\\ \n")

regressions_district_tab <- regressions_district %>%
  select(label, term, estimate, std.error,p.value) %>%
  arrange(term)

regressions_district_tab <- regressions_district_tab %>%
  filter(label != "Civic Participation Index") %>%
  pivot_wider(
    names_from = label,
    values_from = c(estimate, std.error, p.value)
  )

regressions_district_tab <- regressions_district_tab %>%
  pivot_longer(
    cols = starts_with("estimate_") | starts_with("std.error_") | starts_with("p.value_"),
    names_to = c("stat", "label"),
    names_sep = "_",
    values_to = "value",
    values_transform = list(value = as.character)  # Convert to character to handle stars and numeric together
  )

p_values_reg_district <- regressions_district_tab %>%
  filter(stat == "p.value") %>%
  select(label, term, p.value = value)  # Rename `value` to `p.value` for clarity

regressions_district_tab <- regressions_district_tab %>%
  left_join(p_values_reg_district, by = c("label", "term"))

regressions_district_tab <- regressions_district_tab %>%
  mutate(
    # Round and add significance stars based on `p.value`
    value = case_when(
      stat == "estimate" ~ paste0(sprintf("%.3f", as.numeric(value)),
                                  case_when(
                                    as.numeric(p.value) < 0.001 ~ "***",
                                    as.numeric(p.value) < 0.01 ~ "**",
                                    as.numeric(p.value) < 0.05 ~ "*",
                                    TRUE ~ ""
                                  )
      ),
      stat == "std.error" ~ sprintf("%.4f", as.numeric(value)),
      stat == "p.value" ~ sprintf("%.4f", as.numeric(value))
    )
  ) %>%
  select(-p.value)  %>%
  # Pivot to make `estimate`, `std.error`, and `p.value` columns wide
  pivot_wider(
    names_from = label,
    values_from = value
  ) %>%
  # Filter out rows where `stat` is `p.value`
  filter(stat != "p.value")  %>%
  # Correct way to drop the `p.value` column
  mutate(
    # Format std.error with parentheses and ensure estimate retains significance stars
    across(starts_with("Awareness Index"):starts_with("Engagement Behavior Index"),
           ~ ifelse(stat == "estimate", ., paste0("(", ., ")"))
    )
  ) %>%
  # Arrange terms in specified order
  arrange(
    match(term, c(
      "treatmentMedia Literacy", "spillover"
    )),
    stat
  ) %>%
  mutate(
    # Remove `term` for p.value rows and rename variables
    term = ifelse(stat == "std.error", "", term)
  ) %>%
  select(-stat) %>%
  rename(Outcome = term) %>%
  mutate(Outcome = case_when(
    Outcome == "treatmentMedia Literacy" ~ "Treatment",
    Outcome == "spillover" ~ "Indicator for  low-spillover stratum",
    TRUE ~ Outcome  # Keep other values unchanged
  )) %>%
  rename_with(~ gsub("Index$", "", .), ends_with("Index"))


regressions_district_tab <- regressions_district_tab %>%
  relocate("Awareness ", .after = last_col())

regressions_district_tab <- regressions_district_tab %>%
  select(Outcome, everything())

# Ensure no row names in final_data
rownames(regressions_district_tab) <- NULL

# Remove existing column names from final_data to avoid default headers in LaTeX output
colnames(regressions_district_tab) <- NULL

# Generate LaTeX table with xtable
latex_table_regression_district <- xtable(regressions_district_tab, 
                                          caption = "Main results with district FE", 
                                          label = "tab:alt_spec_district_fe")

# Manually specify header rows for two-line headers
header_row1_regression_district <- c("Outcome",  "Accuracy", "Sharing", 
                                     "Health", "Source", "Engagement", "Engagement", "Awareness")
header_row2_regression_district <- c("",  "Discernment", "Discernment", "Preferences", "Discernment", "Attitude", "Behavior", "")

# Create the LaTeX code for the two-line headers with a single \hline after "Behavior" and no extra lines
header_latex_district <- paste(
  paste(header_row1_regression_district, collapse = " & "), "\\\\",
  paste(header_row2_regression_district, collapse = " & "), "\\\\"
)

# Calculate the number of columns and rows as needed
num_cols_reg_district <- ncol(regressions_district_tab) - 1  # Adjust for the alignment
num_rows_reg_district <- nrow(latex_table_regression_district)     # Ensure numeric row count

# Footer
command2 <- paste0(
  "\\hline\n", 
  n_row, 
  r2_row, 
  "District FEs&  Yes &  Yes &  Yes &  Yes &  Yes &  Yes &  Yes \\\\ \n",
  "\\hline\n",
  "\\multicolumn{", num_cols_reg_district + 1, "}{l}{*p$<$0.05; **p$<$0.01; ***p$<$0.001. SEs clustered at the village level in parentheses.} \\\\ \n"
)

# Capture the LaTeX output as a character vector without row names
latex_output_reg_district <- capture.output(print(latex_table_regression_district, type = "latex", include.rownames = FALSE, 
                                                  sanitize.text.function = identity, 
                                                  align = c("l", "l", rep("c", num_cols_reg_district)),  # Use calculated number of columns
                                                  add.to.row = list(
                                                    pos = list(0, num_rows_reg_district),  # Use calculated row count
                                                    command = c(
                                                      header_latex_district,command2)
                                                  )
))

# Find the index of `\centering` and insert `\small` right after it
centering_index_district <- which(grepl("\\centering", latex_output_reg_district))
latex_output_reg_district <- append(latex_output_reg_district, 
                                    "\\resizebox{\\linewidth}{!}{%", 
                                    after = centering_index_district)

# Replace `\end{tabular}` with `%\n}`
latex_output_reg_district <- sub("\\\\end\\{tabular\\}", 
                                 "\\\\hline\n\\\\end\\{tabular\\}%\n}",
                                 latex_output_reg_district)

# Remove extra space before table
latex_output_reg_district[which(latex_output_reg_district == " \\\\ ")] <- "  \\hline"

# Write the captured output to the file
writeLines(latex_output_reg_district, 
           "output/tables/tab_alt_spec_district_fe.tex")


## District-Spillover FEs ----
model_labels <- unique(regressions_district_spillover$label)
model_labels <- model_labels[model_labels != "Civic Participation Index"]

# Relocate "Awareness Index" to the end
if("Awareness Index" %in% model_labels) {
  # Find the position of "Awareness Index"
  awareness_idx <- which(model_labels == "Awareness Index")
  # Remove "Awareness Index" from its original position
  model_labels <- model_labels[-awareness_idx]
  # Add it back at the end
  model_labels <- c(model_labels, "Awareness Index")
}

n_values <- sapply(model_labels, function(lbl) {
  # Get the first N value for each model (assuming it's consistent within models)
  n_val <- regressions_district_spillover %>% 
    filter(label == lbl) %>% 
    pull(n) %>% 
    first()
  return(n_val)
})

# Extract R-squared values for each model
r2_values <- sapply(model_labels, function(lbl) {
  # Get the R-squared value for each model
  r2_val <- regressions_district_spillover %>% 
    filter(label == lbl) %>% 
    pull(r_squared) %>% 
    first()
  return(r2_val)
})

# Format R² values to match current precision
r2_formatted <- sprintf("%.3f", r2_values)

# Create the dynamic string for N and R² rows
n_row <- paste("N &", paste(n_values, collapse = " & "), "\\\\ \n")
r2_row <- paste("R2 &", paste(r2_formatted, collapse = " & "), "\\\\ \n")

regressions_district_spillover_tab <- regressions_district_spillover %>%
  select(label, term, estimate, std.error,p.value) %>%
  arrange(term)

regressions_district_spillover_tab <- regressions_district_spillover_tab %>%
  filter(label != "Civic Participation Index") %>%
  pivot_wider(
    names_from = label,
    values_from = c(estimate, std.error, p.value)
  )

regressions_district_spillover_tab <- regressions_district_spillover_tab %>%
  pivot_longer(
    cols = starts_with("estimate_") | starts_with("std.error_") | starts_with("p.value_"),
    names_to = c("stat", "label"),
    names_sep = "_",
    values_to = "value",
    values_transform = list(value = as.character)  # Convert to character to handle stars and numeric together
  )

p_values_reg_district_spillover <- regressions_district_spillover_tab %>%
  filter(stat == "p.value") %>%
  select(label, term, p.value = value)  # Rename `value` to `p.value` for clarity

regressions_district_spillover_tab <- regressions_district_spillover_tab %>%
  left_join(p_values_reg_district_spillover, by = c("label", "term"))

regressions_district_spillover_tab <- regressions_district_spillover_tab %>%
  mutate(
    # Round and add significance stars based on `p.value`
    value = case_when(
      stat == "estimate" ~ paste0(sprintf("%.3f", as.numeric(value)),
                                  case_when(
                                    as.numeric(p.value) < 0.001 ~ "***",
                                    as.numeric(p.value) < 0.01 ~ "**",
                                    as.numeric(p.value) < 0.05 ~ "*",
                                    TRUE ~ ""
                                  )
      ),
      stat == "std.error" ~ sprintf("%.4f", as.numeric(value)),
      stat == "p.value" ~ sprintf("%.4f", as.numeric(value))
    )
  ) %>%
  select(-p.value)  %>%
  # Pivot to make `estimate`, `std.error`, and `p.value` columns wide
  pivot_wider(
    names_from = label,
    values_from = value
  ) %>%
  # Filter out rows where `stat` is `p.value`
  filter(stat != "p.value")  %>%
  # Correct way to drop the `p.value` column
  mutate(
    # Format std.error with parentheses and ensure estimate retains significance stars
    across(starts_with("Awareness Index"):starts_with("Engagement Behavior Index"),
           ~ ifelse(stat == "estimate", ., paste0("(", ., ")"))
    )
  ) %>%
  # Arrange terms in specified order
  arrange(
    match(term, c(
      "treatmentMedia Literacy")),
    stat
  ) %>%
  mutate(
    # Remove `term` for p.value rows and rename variables
    term = ifelse(stat == "std.error", "", term)
  ) %>%
  select(-stat) %>%
  rename(Outcome = term) %>%
  mutate(Outcome = case_when(
    Outcome == "treatmentMedia Literacy" ~ "Treatment",
    TRUE ~ Outcome  # Keep other values unchanged
  )) %>%
  rename_with(~ gsub("Index$", "", .), ends_with("Index"))


regressions_district_spillover_tab <- regressions_district_spillover_tab %>%
  relocate("Awareness ", .after = last_col())

regressions_district_spillover_tab <- regressions_district_spillover_tab %>%
  select(Outcome, everything())

# Ensure no row names in final_data
rownames(regressions_district_spillover_tab) <- NULL

# Remove existing column names from final_data to avoid default headers in LaTeX output
colnames(regressions_district_spillover_tab) <- NULL


# Generate LaTeX table with xtable
latex_table_regression_district_spillover <- xtable(regressions_district_spillover_tab, 
                                                    caption = "Main results with district-spillover FE", 
                                                    label = "tab:alt_spec_district_spillover_fe")

# Manually specify header rows for two-line headers
header_row1_regression_district_spillover <- c("Outcome",  "Accuracy", "Sharing", 
                                               "Health", "Source", "Engagement", "Engagement", "Awareness")
header_row2_regression_district_spillover <- c("",  "Discernment", "Discernment", "Preferences", "Discernment", "Attitude", "Behavior", "")


# Create the LaTeX code for the two-line headers with a single \hline after "Behavior" and no extra lines
header_latex_district_spillover <- paste(
  paste(header_row1_regression_district_spillover, collapse = " & "), "\\\\",
  paste(header_row2_regression_district_spillover, collapse = " & "), "\\\\"
)

# Calculate the number of columns and rows as needed
num_cols_reg_district_spillover <- ncol(regressions_district_spillover_tab) - 1  # Adjust for the alignment
num_rows_reg_district_spillover <- nrow(latex_table_regression_district_spillover)     # Ensure numeric row count

# Footer
command2 <- paste0(
  "\\hline\n", 
  n_row, 
  r2_row, 
  "District-Spillover FEs&  Yes &  Yes &  Yes &  Yes &  Yes &  Yes &  Yes \\\\ \n",
  "\\hline\n",
  "\\multicolumn{", num_cols_reg_district_spillover + 1, "}{l}{*p$<$0.05; **p$<$0.01; ***p$<$0.001. SEs clustered at the village level in parentheses.} \\\\ \n"
)


# Capture the LaTeX output as a character vector without row names
latex_output_reg_district_spillover <- capture.output(print(latex_table_regression_district_spillover, type = "latex", include.rownames = FALSE, 
                                                            sanitize.text.function = identity, 
                                                            align = c("l", "l", rep("c", num_cols_reg_district_spillover)),  # Use calculated number of columns
                                                            add.to.row = list(
                                                              pos = list(0, num_rows_reg_district_spillover),  # Use calculated row count
                                                              command = c(
                                                                header_latex_district_spillover,command2
                                                              )
                                                            )
))

# Find the index of `\centering` and insert `\small` right after it
centering_index_district_spillover <- which(grepl("\\centering", latex_output_reg_district_spillover))
latex_output_reg_district_spillover <- append(latex_output_reg_district_spillover, 
                                              "\\resizebox{\\linewidth}{!}{%", 
                                              after = centering_index_district_spillover)

# Replace `\end{tabular}` with `%\n}`
latex_output_reg_district_spillover <- sub("\\\\end\\{tabular\\}", 
                                           "\\\\hline\n\\\\end\\{tabular\\}%\n}", 
                                           latex_output_reg_district_spillover)

# Remove extra space before table
latex_output_reg_district_spillover[which(latex_output_reg_district_spillover == " \\\\ ")] <- "  \\hline"

# Write the captured output to the file
writeLines(latex_output_reg_district_spillover, 
           "output/tables/tab_alt_spec_district_spillover_fe.tex")

# END of 15_robust_alt_spec.R ----